misc/extr_PF_ActNuit (1).R

library(data.table)
library(StreamMetabolism)

if(!exists("args")){args=""}
Tri=T
WDF=F
#args[3]=50
#args[4]="DataLP_PF_exportMyoGT.txt"
#Tri=F
#WDF=T

memory.limit(3210241024*1024)

#ETAPE 0 - IMPORT DES TABLES
#bien renommer les chemins en fonction de l'ordi utilisé

#table "données"
Sys.time()
DataLP=fread(args[4]) # 1e5 lines / sec
Sys.time()

#table "seuils"
#RefSeuils=fread("Referentiel_seuils_tabase3HF_1015France_IdConc_Car.csv")


#table "espèces"
GroupList=fread("SpeciesList.csv") 

LatMin=0
LatMax=90
LongMin=-180
LongMax=180


#pour afficher les milisecondes
op <- options(digits.secs=3)
#pour reset
#options(op)

if(Tri)
{
  #ETAPE 0 - tri des participations foireuses (durée séquence Pip)
  #A FAIRE : tri sur le sampling rate
  Sys.time()
  DataPip=subset(DataLP,substr(DataLP$espece,1,3)=="Pip") #3 sec
  Sys.time()
  DurSeq=DataPip$temps_fin-DataPip$temps_debut
  Q90Pip=aggregate(DurSeq,by=list(DataPip$participation,DataPip$DataMicFinal)
                   ,FUN=function(x) quantile(x,0.9))
  SelQ90Pip=subset(Q90Pip,Q90Pip$x>4.3)
  Sys.time()
  test=match(paste(DataLP$participation,DataLP$DataMicFinal)
             ,paste(SelQ90Pip$Group.1,SelQ90Pip$Group.2)) # 6 sec
  Sys.time()
  DataLP=subset(DataLP,is.na(test)==F)
  Sys.time()
}

#ETAPE 1 - formattage des tables et de leurs attributs
#ajout des infos temps relatifs / sunrise-sunset
Sys.time()
LLJour=unique(cbind(DataLP$latitude,DataLP$longitude,DataLP$DateJour)) # 5 min
Sys.time()
#DateSrst=format(as.Date(LLJour[,3],origin = "1970-01-01"),format="%Y/%m/%d")
DateLP=format(as.Date(LLJour[,3]),format="%Y-%m-%d")
Sys.time()
Srst=mapply(sunrise.set,as.numeric(LLJour[,1]),as.numeric(LLJour[,2]),DateLP) #50 sec
Sys.time()
SrstD=as.data.frame(t(Srst))
DataSrst=as.data.frame(cbind(LLJour,SrstD))
colnames(DataSrst)=c("latitude","longitude","DateJour","sunrise","sunset")
DataSrst$latitude=as.numeric(as.character(DataSrst$latitude))
DataSrst$longitude=as.numeric(as.character(DataSrst$longitude))


gc()
Sys.time()
DataLPS=merge(DataLP,DataSrst,by=c("latitude","longitude","DateJour"))
Sys.time()
rm(DataLP)

Sys.time()
Decst=DataLPS$TempsEnregistrement2-as.numeric(DataLPS$sunset)
Sys.time()
#recale par rapport au coucher de soleil du bon jour
DecstP=Decst+3600*24*(Decst<(-6*3600))  # 2 min
Sys.time()
DataLPS[,DecstP:=DecstP]
Sys.time()

Decsr=as.numeric(DataLPS$sunrise)-DataLPS$TempsEnregistrement2
#recale par rapport au lever de soleil du bon jour
DecsrP=Decsr+3600*24*(Decsr<(-6*3600)) # 2 min
Sys.time()
DataLPS[,DecsrP:=DecsrP]
Sys.time()

if(exists("RefSeuils"))
{
#merge avec espèce pour tri selon seuil
#simplifie la table groupe pour ne pas alourdir la grosse table Data...
#simplifie la table groupe pour ne pas alourdir la grosse table Data...
GroupSimpl=data.frame(espece=GroupList$Esp,nom=GroupList$`Scientific name`
                      ,groupe=GroupList$Group)
GroupRef=merge(GroupSimpl,RefSeuils,by.x="espece",by.y="Espece")

Sys.time()
DataLPSG=merge(DataLPS,GroupRef,by="espece")
Sys.time()
test=match(DataLPS$espece,GroupRef$espece)
SpManquante=subset(DataLPS,is.na(test))
table(SpManquante$espece)
rm(DataLPS)


#ColS=match(args[3],colnames(DataLPSG))
Sys.time()
ColSeuil=match(args[3],names(DataLPSG))
Fiable=(DataLPSG$probabilite>DataLPSG[,..ColSeuil])
Sys.time()
table(Fiable,DataLPSG$espece)
Sys.time()
DataFiable=subset(DataLPSG,as.logical(Fiable)) # 10 sec
Sys.time()
rm(DataLPSG) # 30 sec
#test=DataFiable[1:100000,]
}else{
  DataFiable=subset(DataLPS,DataLPS$probabilite>(as.numeric(args[3])/100)) # 10 sec
}

if(WDF)
{
  fwrite(DataFiable,paste0("S_",args[4]))
}



Sys.time()
DataPF_ActNuit=aggregate(DataFiable$donnee
                         ,by=list(DataFiable$participation
                                  ,DataFiable$DateNuit
                                  ,DataFiable$DataMicFinal
                                  ,DataFiable$groupe
                                  ,DataFiable$espece
                         )
                         ,FUN=length) # 15 min
Sys.time()
DataPF_MinSt=aggregate(DataFiable$DecstP
                       ,by=list(DataFiable$participation
                                ,DataFiable$DateNuit
                                ,DataFiable$DataMicFinal
                                ,DataFiable$groupe
                                ,DataFiable$espece
                       )
                       ,FUN=min)
Sys.time()
DataPF_MinSr=aggregate(DataFiable$DecsrP
                       ,by=list(DataFiable$participation
                                ,DataFiable$DateNuit
                                ,DataFiable$DataMicFinal
                                ,DataFiable$groupe
                                ,DataFiable$espece
                       )
                       ,FUN=min)
Sys.time()
DataPF_SpNuit=cbind(DataPF_ActNuit,DataPF_MinSt$x,DataPF_MinSr$x)
colnames(DataPF_SpNuit)=c("participation","Nuit","num_micro","groupe","espece"
                          ,"nb_contacts","min_decalage_coucher","min_decalage_lever")



fwrite(DataPF_SpNuit,paste0("SpNuit",args[4]))

Sys.time()
DataDMinSr=aggregate(DataFiable$DecsrP
                     ,by=list(DataFiable$participation
                              ,DataFiable$DateNuit
                              ,DataFiable$DataMicFinal)
                     ,FUN=min) # 20 sec
Sys.time()
DataDMaxSr=aggregate(DataFiable$DecsrP
                     ,by=list(DataFiable$participation
                              ,DataFiable$DateNuit
                              ,DataFiable$DataMicFinal)
                     ,FUN=max) # 20 sec
Sys.time()
DataDMinSt=aggregate(DataFiable$DecstP
                     ,by=list(DataFiable$participation
                              ,DataFiable$DateNuit
                              ,DataFiable$DataMicFinal)
                     ,FUN=min)
Sys.time()
DataDMaxSt=aggregate(DataFiable$DecstP
                     ,by=list(DataFiable$participation
                              ,DataFiable$DateNuit
                              ,DataFiable$DataMicFinal)
                     ,FUN=max)
Sys.time()
DataDecPNM=cbind(DataDMinSr,DataDMaxSr$x,DataDMinSt$x,DataDMaxSt$x)
colnames(DataDecPNM)=c("participation","Nuit","num_micro"
                       ,"decalage_fin_lever"
                       ,"decalage_debut_lever"
                       ,"decalage_debut_coucher"
                       ,"decalage_fin_coucher")

DataPF_SpNuit2=merge(DataPF_SpNuit,DataDecPNM
                     ,by=c("participation","Nuit","num_micro"))

fwrite(DataPF_SpNuit2,paste0("SpNuit2_",args[3],"_",args[4]))
cesco-lab/Vigie-Chiro_scripts documentation built on April 4, 2024, 4:27 a.m.